In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn import svm

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.io as sio

In [2]:
# kernek function
def gaussian_kernel(x1, x2, sigma):
    return np.exp(- np.power(x1 - x2, 2).sum() / (2 * (sigma ** 2)))

In [3]:
x1 = np.array([1, 2, 1])
x2 = np.array([0, 4, -1])
sigma = 2

gaussian_kernel(x1, x2, sigma)


Out[3]:
0.32465246735834974

load data


In [4]:
mat = sio.loadmat('./data/ex6data2.mat')
print(mat.keys())
data = pd.DataFrame(mat.get('X'), columns=['X1', 'X2'])
data['y'] = mat.get('y')

data.head()


dict_keys(['__globals__', 'X', '__version__', 'y', '__header__'])
Out[4]:
X1 X2 y
0 0.107143 0.603070 1
1 0.093318 0.649854 1
2 0.097926 0.705409 1
3 0.155530 0.784357 1
4 0.210829 0.866228 1

In [5]:
data.shape


Out[5]:
(863, 3)

visualize data


In [6]:
sns.set(context="notebook", style="white", palette=sns.diverging_palette(240, 10, n=2))
sns.lmplot('X1', 'X2', hue='y', data=data, 
           size=5, 
           fit_reg=False, 
           scatter_kws={"s": 10}
          )


Out[6]:
<seaborn.axisgrid.FacetGrid at 0x106c59710>

try built-in Gaussian Kernel of sklearn


In [7]:
svc = svm.SVC(C=100, kernel='rbf', gamma=10, probability=True)
svc


Out[7]:
SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=10, kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [8]:
svc.fit(data[['X1', 'X2']], data['y'])
svc.score(data[['X1', 'X2']], data['y'])


Out[8]:
0.9698725376593279

the predict_proba will give you ndarray (data size, class)
so if you just want to plot the decision contour of this binary example, choose one class and plot it


In [9]:
predict_prob = svc.predict_proba(data[['X1', 'X2']])[:, 0]

In [10]:
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(data['X1'], data['X2'], s=30, c=predict_prob, cmap='Reds')


Out[10]:
<matplotlib.collections.PathCollection at 0x1151ec978>

In [ ]: